The dataset provided to you has data for several websites owned by the same company and they are asking for your help for what should be their approach to set reserve prices and what is the range for reserve prices they should be setting for July. The data is only of the actual revenue generation and not at bid level. The dataset has the following columns:
QUESTIONS –
import os
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn import tree
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot, plot
import graphviz
import seaborn as sns
import math
import matplotlib.pyplot as plt
from matplotlib import pyplot
import seaborn as sns
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot, plot
from scipy.stats.mstats import mode
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 12, 4
data = pd.read_csv('Ascendeum_Dataset2.csv')
data.head()
data.tail()
data.shape
data.nunique()
data.dtypes
data.info()
data.describe()
data.describe(include = 'object')
data.isnull().sum()
We understand that there are no missing values in the dataset and continue our analysis
#!pip install pandas-profiling[notebook,html]
import pandas_profiling
pandas_profiling.ProfileReport(data)